import re
import requests
from bs4 import BeautifulSoup
import json
import os
import pymongo
import uuid
from src.to_neo4j import dataToNeo4j
import json


class spiderCheck39():
    """
    39net检查数据爬取
    """

    def __init__(self):
        self.save_path = "../file/39/检查1/"
        self.Neo = dataToNeo4j()

    def re_sub_key(self, data, name):
        return re.sub(name, '', re.sub(('[\n\r\t：]'), '', data))

    def single_handle(self, file):
        """
        单个检查数据清洗
        :param file:
        :return:
        """

        with open(file, encoding='utf8') as data_json:
            data_json = json.load(data_json)
        temp = {}
        for key, value in data_json.items():
            key_ = self.re_sub_key(key, data_json['检查名字'])
            if isinstance(value, list):
                if key_.find('部位') != -1:
                    position = []
                    for element in value:
                        if element:
                            position.append(element[0])
                    temp['部位'] = position
                elif key_.find('科室') != -1:
                    depart = [re.sub(('[\n \r \t  ：\xa0\u3000]'), '', element) for element in
                              ''.join(value).replace(key, '').split("\n")
                              if element != '']
                    temp['科室'] = [element for element in depart if element != '']
                elif key_.find('空腹检查') != -1:
                    temp['空腹检查'] = re.sub(('[\n \r \t  ：\xa0\u3000]'), '', ''.join(value).replace(key, ''))
                elif key_.find('简介') != -1:
                    temp['简介'] = re.sub(('[\n \r \t  ：\xa0\u3000]'), '', ''.join(value).replace(key, ''))
                elif key_.find('相关') != -1 or key_.find('同类') != -1 or key_.find(
                        '所属') != -1 or key_ == '指标解读' or key_ == '包含项目' or key_ == '脑脊液培养+药敏试验包含项目':
                    if key_ == '指标解读':
                        continue
                    relevant_value = []
                    for element in value:
                        if len(element) == 2:
                            if isinstance(element, dict):
                                relevant_value.append(element['text'])
                            else:
                                relevant_value.append(element[0])
                        else:
                            relevant_value.append(element[2])
                    temp[key_] = relevant_value
                else:
                    if len(value) > 1:
                        if value[1] in value[0]:
                            del value[0]
                        temp[key_] = [re.sub('[\n \r \t  ：\xa0\u3000]', '', element) for element in value]
                    else:
                        if key_.find('医院参考价') == -1:
                            # print(value,key_)
                            temp[key_] = [re.sub(('[\n \r \t  ：\xa0\u3000]'), '', element) for element in value]

            elif key_.find('type') != -1:
                temp['部位大类'] = value
            elif key_.find('别名') != -1:
                temp['别名'] = [element for element in re.split('[，,]',
                                                              value.replace(key, '').replace('（', '').replace('）',
                                                                                                              '').replace(
                                                                  '：', '')) if element != '']
            else:
                temp[key_] = value
        return temp

    def handle_all(self):
        """
        全部数据入库dff02bc6-f0af-11ea-a9f5-a1601e503816.json
        :return:
        """
        # f =open("jiancha.txt","w",encoding="utf8")
        result1 =[]
        f = open("jiancha1.txt", "w", encoding="utf8")
        for file in os.listdir(self.save_path):
            # try:
                result = self.single_handle(self.save_path + file)
                # if '相关疾病' in result:
                #     print(result['相关疾病'])
                # if self.Neo.query_node("疾病检查", name=result['检查名字']) == 0:
                #     self.Neo.create_node("疾病检查", result['检查名字'])
                jiancha = result['检查名字']
                data1 =result['别名']
                if '检查组合' in data1:
                    data1.remove('检查组合')
                f.write(jiancha+"\t"+"\t".join(data1)+"\n")
                print(jiancha,result['别名'])
                # for element in result['别名']:
                #     if element!='检查组合':
                #         if self.Neo.query_node("检查别名", name=element) == 0:
                #             self.Neo.create_node("检查别名", element)
                #             status = self.Neo.relat_exists("疾病检查", jiancha, element, '检查对应别名')
                #             if status:
                #                 self.Neo.create_relation_ship('疾病检查', "检查别名", [[jiancha, element]], '检查对应别名')
                # for element in result['部位']:
                #     if element!='其他':
                #         if self.Neo.query_node("疾病部位", name=element) == 0:
                #             self.Neo.create_node("疾病部位", element)
                #             status = self.Neo.relat_exists("疾病检查", jiancha, element, '检查对应部位')
                #             if status:
                #                 self.Neo.create_relation_ship('疾病检查', "疾病部位", [[jiancha, element]], '检查对应部位')
                # for element in result['部位']:
                #     if element!='其他':
                #         if self.Neo.query_node("疾病部位", name=element) == 0:
                #             self.Neo.create_node("疾病部位", element)
                #             status = self.Neo.relat_exists("疾病检查", jiancha, element, '检查对应部位')
                #             if status:
                #                 print(jiancha, element)
                #                 self.Neo.create_relation_ship('疾病检查', "疾病部位", [[jiancha, element]], '检查对应部位')
                # for element in result['科室']:
                #     if element!='其他':
                #         # if self.Neo.query_node("疾病科室", name=element) == 0:
                #         #     self.Neo.create_node("疾病科室", element)
                #             status = self.Neo.relat_exists("疾病检查", jiancha, element, '检查对应科室')
                #             if status:
                #                 print(jiancha, element)
                #                 self.Neo.create_relation_ship('疾病检查', "疾病科室", [[jiancha, element]], '检查对应科室')
                # for element in result['相关疾病']:
                #     if element != '其他':
                #         if self.Neo.query_node("疾病名称", name=element) == 0:
                #             self.Neo.create_node("疾病名称", element)
                #             status = self.Neo.relat_exists("疾病检查", jiancha, element, '检查相关疾病')
                #             if status:
                #                 self.Neo.create_relation_ship('疾病检查', "疾病名称", [[jiancha, element]], '检查相关疾病')
                # for element in result['相关症状']:
                #     if element != '其他':
                #         print(element)
                #         if self.Neo.query_node("疾病症状", name=element) == 0:
                #             self.Neo.create_node("疾病症状", element)
                #             status = self.Neo.relat_exists("疾病检查", jiancha, element, '检查相关症状')
                #             if status:
                #                 self.Neo.create_relation_ship('疾病检查', "疾病症状", [[jiancha, element]], '检查相关症状')
                # for element in result['科室']:
                # f.write(jiancha+"\t"+"\t".join(result['科室'])+"\n")
                # print(jiancha)
                # key_ = [key for key, value in result.items()]
                # for element in key_:
                #     if element.find("同类")!=-1 and element.find("同类其他")==-1:
                #         print(element)
                    # if element.find(
                    #         "相关") != -1 and element != "相关疾病" and element != "相关症状" and element != "相关其他" and element != "相关" and element.find(
                    #         "同类") == -1:
                    #     result1.append(element)
                        # print(element)

                        # for element_one in result[element]:
                        #     print(element_one, '检查' + element)
                        #     if self.Neo.query_node("疾病检查", name=element_one) == 0:
                        #         self.Neo.create_node("疾病检查", element_one)
                        #     status = self.Neo.relat_exists("疾病检查", jiancha, element_one, '检查' + element)
                        #     if status and jiancha != element_one:
                        #         self.Neo.create_relation_ship('疾病检查', "疾病检查", [[jiancha, element_one]], '检查' + element)

            # except:
            #     print([key for key, value in result.items()], file)
            #     pass
        # print(list(set(result1)))

    def my_handle(self):
        result = self.single_handle(self.save_path + "dff02bc6-f0af-11ea-a9f5-a1601e503816.json")
        print(result)

# ['相关妇科疾病检查', '相关动脉硬化检查', '相关肺炎检查', '相关血液病检查', '相关健康体检', '相关肺结核检查', '相关乳房检查', '相关甲状腺功能检查', '相关肝炎肝病检查', '相关性病检查', '相关风湿疾病检查', '相关炎症检查', '相关口腔检查', '相关婚检', '相关男科疾病检查', '相关肿瘤检查', '相关过敏检测', '相关产检', '相关糖尿病检查', '相关肾病检查', '相关遗传病检查', '相关骨骼检查', '相关皮肤病检查', '相关高血压检查', '相关耳鼻喉检查', '相关疱疹病毒检查', '相关脑中风检查', '相关眼部检查', '相关胃肠病检查', '相关心脏病检查']

spiderCheck39().handle_all()

# 48dc62d0-f0b1-11ea-9c70-a1601e503816.json
# 8e866db0-f0b1-11ea-bc26-a1601e503816.json
# dff02bc6-f0af-11ea-a9f5-a1601e503816.json
# f5adbd4a-f0b0-11ea-a64f-a1601e503816.json血浆凝血酶原片段1+2 动态血压监测(ABPM) 血清抗SS-A(Ro)抗体 血清抗SS-B(La)抗体 痰培养+药敏 并指(趾)畸形检查 并指(趾)畸形检查


# dff02bc6-f0af-11ea-a9f5-a1601e503816.json
# f5adbd4a-f0b0-11ea-a64f-a1601e503816.json


# 00bdbb80-f0b0-11ea-bf54-a1601e503816.json
# 00e0ef0c-f0b0-11ea-98e6-a1601e503816.json
# 012abf58-f0b0-11ea-b739-a1601e503816.json
# 015ec992-f0b0-11ea-b3b2-a1601e503816.json
# 046ae3f0-f0b1-11ea-8709-a1601e503816.json
# 0c3193ec-f0b1-11ea-a52f-a1601e503816.json
# 0d9489de-f0b1-11ea-ad43-a1601e503816.json
# 0e3e26c8-f0b0-11ea-b82f-a1601e503816.json
# 0eb3381e-f0b0-11ea-b725-a1601e503816.json
# 16104f38-f0b0-11ea-942b-a1601e503816.json
# 1971a430-f0b1-11ea-801d-a1601e503816.json
# 19a55b2e-f0b1-11ea-baf5-a1601e503816.json
# 23090d9e-f0b1-11ea-92ea-a1601e503816.json
# 23f90336-f0b1-11ea-b377-a1601e503816.json
# 25088db6-f0b1-11ea-84e0-a1601e503816.json
